{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import json\n",
    "import pickle\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import display, HTML\n",
    "import os,sys,inspect, pickle\n",
    "import numpy as np\n",
    "\n",
    "from utils import *\n",
    "\n",
    "pd.set_option('display.max_colwidth', 500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def create_empty_base_df():\n",
    "    data = {\n",
    "        'Algorithm': [],\n",
    "        'Runtime total': [],\n",
    "        'Runtime index simulation': [],\n",
    "        'Runtime cost requests': [],\n",
    "        'Cost Requests Total': [],\n",
    "        'Not cached': [],\n",
    "        'Cost Requests Cached': [],\n",
    "        'Evaluated Configurations': [],\n",
    "        'Indexes Simulated': []\n",
    "    }\n",
    "    \n",
    "    return pd.DataFrame(data, columns = data.keys())\n",
    "\n",
    "\n",
    "def best_row_fitting_budget(df, budget):\n",
    "    df['original idx'] = df.index\n",
    "    df = df[df['memory consumption'] < budget]\n",
    "    if len(get_costs(df)) < 1:\n",
    "        return None\n",
    "    row_with_best_configuration = get_costs(df).index(min(get_costs(df)))\n",
    "    return df.iloc[row_with_best_configuration]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "CSV_PATH = '../tpcds_wo_4_6_9_10_11_32_35_41_95/'\n",
    "BENCHMARK = 'tpcds'\n",
    "SCALE_FACTOR = 10\n",
    "# all except queries that dominate costs by orders of magnitude\n",
    "QUERIES = [1, 2, 3, 5, 7, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99]\n",
    "\n",
    "BUDGETS_B = [gb_to_b(5)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "result = create_empty_base_df()\n",
    "budget = BUDGETS_B[0]\n",
    "for algorithm in ALGORITHMS:\n",
    "    if algorithm == 'cophy' or algorithm == 'dexter':\n",
    "        continue\n",
    "        \n",
    "    style = styles[algorithm]\n",
    "    csv_path= f\"{CSV_PATH}/results_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.csv\"\n",
    "    try:\n",
    "        df = pd.read_csv(csv_path, sep=';')\n",
    "    except:\n",
    "        continue\n",
    "    if algorithm == 'dexter':\n",
    "        df = df.iloc[::-1]\n",
    "    best_fitting_row = best_row_fitting_budget(df, budget)\n",
    "    if best_fitting_row is not None:\n",
    "        print(best_fitting_row['parameters'])\n",
    "        unique_workload_evaluations = int((best_fitting_row['cost requests'] - best_fitting_row['cache hits']) / len(QUERIES))\n",
    "        result = result.append({\n",
    "            'Algorithm': style.label,\n",
    "            'Runtime total': f\"{s_to_m(best_fitting_row['algorithm runtime']):.2f}\",\n",
    "            'Runtime index simulation': f\"{best_fitting_row['algorithm index creation time'] / best_fitting_row['algorithm runtime'] * 100:.2f}\",\n",
    "            'Runtime cost requests': f\"{best_fitting_row['algorithm cost time'] / best_fitting_row['algorithm runtime'] * 100:.2f}\",\n",
    "            'Cost Requests Total': f\"{best_fitting_row['cost requests']:d}\",\n",
    "            'Not cached': f\"{best_fitting_row['cost requests']-best_fitting_row['cache hits']:d}\",\n",
    "            'Cost Requests Cached': f\"{best_fitting_row['cache hits'] / best_fitting_row['cost requests'] * 100 if best_fitting_row['cache hits'] > 0 else 0 : .2f}\",\n",
    "            'Evaluated Configurations': f\"{unique_workload_evaluations:d}\",\n",
    "            'Indexes Simulated': f\"{best_fitting_row['algorithm created #indexes']:d}\",\n",
    "        }, ignore_index=True)\n",
    "\n",
    "# Dexter must be hardcoded from manual observations because we do not instrument it yet.\n",
    "dexter_hypo_index_creations = {\n",
    "    'tpch': 451,\n",
    "    'job': 291,\n",
    "    'tpcds': 3982\n",
    "}\n",
    "result = result.append({\n",
    "    'Algorithm': styles['dexter'].label,\n",
    "    'Runtime total': f\"{s_to_m(25.17):.2f}\",\n",
    "    'Runtime index simulation': \"-\",\n",
    "    'Runtime cost requests': \"-\",\n",
    "    'Cost Requests Total': f\"{2*len(QUERIES)}\",\n",
    "    'Not cached': 180,\n",
    "    'Cost Requests Cached': f\"{0.00:.2f}\",\n",
    "    'Evaluated Configurations': 2,\n",
    "    'Indexes Simulated': f\"{dexter_hypo_index_creations[BENCHMARK]}\",\n",
    "}, ignore_index=True)\n",
    "result = result.sort_values(by=['Algorithm'])\n",
    "result"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
